import sys
sys.path.append('..')
from data.model.metrics import TRAINING_METRICS, VALIDATION_METRICS, FEATURES_IMPORTANCES
from data.labeled.preprocessed import  RISKS_MAPPING as risks
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

Features that were selected from the final model¶

for risk, importance in FEATURES_IMPORTANCES.items():
    print(f"Risk: {risks[risk]}")
    plt.figure(figsize=[5,5])
    importance.sort_values(ascending=True).plot.barh()
    plt.show()

Risk: Higher water prices

Risk: Inadequate or aging infrastructure

Risk: Increased water stress or scarcity

Risk: Declining water quality

Risk: Increased water demand

Risk: Regulatory

Risk: Energy supply issues

The following metrics were computed by considering a validation set¶

Training Metrics¶

for risk,metrics in TRAINING_METRICS.items():
    print('Risk:', risks[risk])
    print("Confusion Matrix:")
    plt.figure()
    sns.heatmap(metrics['confusion_matrix'], annot=True)
    plt.show()
    print("Regression Report")
    display(pd.DataFrame([metrics['regression_report']]))
    print("Classification Report")
    display(pd.DataFrame(metrics['classification_report']))

Risk: Higher water prices
Confusion Matrix:

Regression Report

Classification Report

Risk: Inadequate or aging infrastructure
Confusion Matrix:

Regression Report

Classification Report

Risk: Increased water stress or scarcity
Confusion Matrix:

Regression Report

Classification Report

Risk: Declining water quality
Confusion Matrix:

Validation Metrics¶

import seaborn as sns

for risk,metrics in VALIDATION_METRICS.items():
    print('Risk:', risks[risk])
    print("Confusion Matrix:")
    plt.figure()
    sns.heatmap(metrics['confusion_matrix'], annot=True)
    plt.show()
    print("Regression Report")
    display(pd.DataFrame([metrics['regression_report']]))
    print("Classification Report")
    display(pd.DataFrame(metrics['classification_report']))

Risk: Higher water prices
Confusion Matrix:

Regression Report

Classification Report

Risk: Inadequate or aging infrastructure
Confusion Matrix:

Regression Report

Classification Report

Risk: Increased water stress or scarcity
Confusion Matrix:

Regression Report

Classification Report

Risk: Declining water quality
Confusion Matrix:

Average MSE for training¶

training_mse = pd.Series({risks[risk]: metric['regression_report']['MSE'] for risk,metric in TRAINING_METRICS.items()})
display(training_mse)
training_mse.describe()

Higher water prices                   0.581700
Inadequate or aging infrastructure    0.639898
Increased water stress or scarcity    0.249674
Declining water quality               0.671731
Increased water demand                0.555782
Regulatory                            0.258317
Energy supply issues                  0.415226
dtype: float64

count    7.000000
mean     0.481761
std      0.175490
min      0.249674
25%      0.336772
50%      0.555782
75%      0.610799
max      0.671731
dtype: float64

Average MSE for validation¶

valid_mse = pd.Series({risks[risk]: metric['regression_report']['MSE'] for risk,metric in VALIDATION_METRICS.items()})
display(valid_mse)
valid_mse.describe()

Higher water prices                   0.835675
Inadequate or aging infrastructure    0.756456
Increased water stress or scarcity    0.287249
Declining water quality               0.753705
Increased water demand                0.983377
Regulatory                            0.665608
Energy supply issues                  0.784080
dtype: float64

count    7.000000
mean     0.723736
std      0.215814
min      0.287249
25%      0.709657
50%      0.756456
75%      0.809878
max      0.983377
dtype: float64

	0.0	1.0	2.0	3.0	accuracy	macro avg	weighted avg
precision	1.000000	0.163265	0.0	0.0	0.316667	0.290816	0.555102
recall	0.343750	1.000000	0.0	0.0	0.316667	0.335938	0.316667
f1-score	0.511628	0.280702	0.0	0.0	0.316667	0.198082	0.310295
support	32.000000	8.000000	18.0	2.0	0.316667	60.000000	60.000000

	0.0	1.0	2.0	3.0	accuracy	macro avg	weighted avg
precision	0.0	0.025000	0.761905	0.0	0.475728	0.196726	0.526410
recall	0.0	0.200000	0.676056	0.0	0.475728	0.219014	0.475728
f1-score	0.0	0.044444	0.716418	0.0	0.475728	0.190216	0.495999
support	24.0	5.000000	71.000000	3.0	0.475728	103.000000	103.000000

	0.0	1.0	2.0	3.0	accuracy	macro avg	weighted avg
precision	0.0	0.666667	0.743017	0.0	0.741758	0.352421	0.605245
recall	0.0	0.117647	1.000000	0.0	0.741758	0.279412	0.741758
f1-score	0.0	0.200000	0.852564	0.0	0.741758	0.263141	0.641709
support	2.0	17.000000	133.000000	30.0	0.741758	182.000000	182.000000

	0.0	1.0	2.0	3.0	accuracy	macro avg	weighted avg
precision	0.0	0.137931	0.606061	0.0	0.5	0.185998	0.342868
recall	0.0	0.266667	0.869565	0.0	0.5	0.284058	0.500000
f1-score	0.0	0.181818	0.714286	0.0	0.5	0.224026	0.406351
support	26.0	15.000000	69.000000	18.0	0.5	128.000000	128.000000

	0.0	1.0	2.0	3.0	accuracy	macro avg	weighted avg
precision	1.000000	0.225	0.666667	0.0	0.485294	0.472917	0.689951
recall	0.571429	0.900	0.320000	0.0	0.485294	0.447857	0.485294
f1-score	0.727273	0.360	0.432432	0.0	0.485294	0.379926	0.511389
support	28.000000	10.000	25.000000	5.0	0.485294	68.000000	68.000000

	0.0	1.0	2.0	accuracy	macro avg	weighted avg
precision	1.000000	0.222222	0.0	0.688889	0.407407	0.730864
recall	0.843750	1.000000	0.0	0.688889	0.614583	0.688889
f1-score	0.915254	0.363636	0.0	0.688889	0.426297	0.683171
support	32.000000	4.000000	9.0	0.688889	45.000000	45.000000

	0.0	1.0	2.0	3.0	accuracy	macro avg	weighted avg
precision	0.971429	0.0	0.0	0.0	0.829268	0.242857	0.829268
recall	0.971429	0.0	0.0	0.0	0.829268	0.242857	0.829268
f1-score	0.971429	0.0	0.0	0.0	0.829268	0.242857	0.829268
support	35.000000	1.0	4.0	1.0	0.829268	41.000000	41.000000

	0.0	1.0	2.0	3.0	accuracy	macro avg	weighted avg
precision	0.500000	0.160000	0.0	0.0	0.185185	0.165000	0.282963
recall	0.071429	1.000000	0.0	0.0	0.185185	0.267857	0.185185
f1-score	0.125000	0.275862	0.0	0.0	0.185185	0.100216	0.105683
support	14.000000	4.000000	8.0	1.0	0.185185	27.000000	27.000000

	0.0	1.0	2.0	3.0	accuracy	macro avg	weighted avg
precision	0.0	0.052632	0.692308	0.0	0.422222	0.186235	0.480432
recall	0.0	0.333333	0.580645	0.0	0.422222	0.228495	0.422222
f1-score	0.0	0.090909	0.631579	0.0	0.422222	0.180622	0.441148
support	10.0	3.000000	31.000000	1.0	0.422222	45.000000	45.000000

	0.0	1.0	2.0	3.0	accuracy	macro avg	weighted avg
precision	0.0	0.0	0.734177	0.0	0.734177	0.183544	0.539016
recall	0.0	0.0	1.000000	0.0	0.734177	0.250000	0.734177
f1-score	0.0	0.0	0.846715	0.0	0.734177	0.211679	0.621639
support	1.0	7.0	58.000000	13.0	0.734177	79.000000	79.000000

	0.0	1.0	2.0	3.0	accuracy	macro avg	weighted avg
precision	0.0	0.058824	0.631579	0.0	0.454545	0.172601	0.351984
recall	0.0	0.142857	0.800000	0.0	0.454545	0.235714	0.454545
f1-score	0.0	0.083333	0.705882	0.0	0.454545	0.197304	0.395633
support	11.0	7.000000	30.000000	7.0	0.454545	55.000000	55.000000

	0.0	1.0	2.0	3.0	accuracy	macro avg	weighted avg
precision	0.666667	0.190476	0.666667	0.0	0.333333	0.380952	0.558730
recall	0.333333	1.000000	0.166667	0.0	0.333333	0.375000	0.333333
f1-score	0.444444	0.320000	0.266667	0.0	0.333333	0.257778	0.327111
support	12.000000	4.000000	12.000000	2.0	0.333333	30.000000	30.000000

	0.0	1.0	2.0	accuracy	macro avg	weighted avg
precision	0.750000	0.125	0.0	0.5	0.291667	0.537500
recall	0.642857	0.500	0.0	0.5	0.380952	0.500000
f1-score	0.692308	0.200	0.0	0.5	0.297436	0.504615
support	14.000000	2.000	4.0	0.5	20.000000	20.000000

	0.0	2.0	3.0	accuracy	macro avg	weighted avg
precision	0.833333	0.0	0.0	0.833333	0.277778	0.694444
recall	1.000000	0.0	0.0	0.833333	0.333333	0.833333
f1-score	0.909091	0.0	0.0	0.833333	0.303030	0.757576
support	15.000000	2.0	1.0	0.833333	18.000000	18.000000